import json
import random
from urllib.request import urlopen
# storing and anaysis
import numpy as np
import pandas as pd
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# hide warnings
import warnings
warnings.filterwarnings('ignore')
!pip install dash
Collecting dash Downloading dash-2.4.1-py3-none-any.whl (9.8 MB) Collecting dash-core-components==2.0.0 Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB) Collecting dash-html-components==2.0.0 Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB) Requirement already satisfied: plotly>=5.0.0 in d:\anaconda1\lib\site-packages (from dash) (5.6.0) Requirement already satisfied: Flask>=1.0.4 in d:\anaconda1\lib\site-packages (from dash) (1.1.2) Collecting flask-compress Downloading Flask_Compress-1.12-py3-none-any.whl (7.9 kB) Collecting dash-table==5.0.0 Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB) Requirement already satisfied: click>=5.1 in d:\anaconda1\lib\site-packages (from Flask>=1.0.4->dash) (8.0.4) Requirement already satisfied: Werkzeug>=0.15 in d:\anaconda1\lib\site-packages (from Flask>=1.0.4->dash) (2.0.3) Requirement already satisfied: itsdangerous>=0.24 in d:\anaconda1\lib\site-packages (from Flask>=1.0.4->dash) (2.0.1) Requirement already satisfied: Jinja2>=2.10.1 in d:\anaconda1\lib\site-packages (from Flask>=1.0.4->dash) (2.11.3) Requirement already satisfied: colorama in d:\anaconda1\lib\site-packages (from click>=5.1->Flask>=1.0.4->dash) (0.4.4) Requirement already satisfied: MarkupSafe>=0.23 in d:\anaconda1\lib\site-packages (from Jinja2>=2.10.1->Flask>=1.0.4->dash) (2.0.1) Requirement already satisfied: tenacity>=6.2.0 in d:\anaconda1\lib\site-packages (from plotly>=5.0.0->dash) (8.0.1) Requirement already satisfied: six in d:\anaconda1\lib\site-packages (from plotly>=5.0.0->dash) (1.16.0) Collecting brotli Downloading Brotli-1.0.9-cp39-cp39-win_amd64.whl (383 kB) Installing collected packages: brotli, flask-compress, dash-table, dash-html-components, dash-core-components, dash Successfully installed brotli-1.0.9 dash-2.4.1 dash-core-components-2.0.0 dash-html-components-2.0.0 dash-table-5.0.0 flask-compress-1.12
full_table = pd.read_table(r'D:\dwon\valeursfoncieres-2021.txt',sep='|',parse_dates=['Date mutation'])
full_table.head()
| Code service CH | Reference document | 1 Articles CGI | 2 Articles CGI | 3 Articles CGI | 4 Articles CGI | 5 Articles CGI | No disposition | Date mutation | Nature mutation | ... | Surface Carrez du 5eme lot | Nombre de lots | Code type local | Type local | Identifiant local | Surface reelle bati | Nombre pieces principales | Nature culture | Nature culture speciale | Surface terrain | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 | 2021-05-01 | Vente | ... | NaN | 0 | 1.0 | Maison | NaN | 97.0 | 5.0 | S | NaN | 2410.0 |
| 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 | 2021-05-01 | Vente | ... | NaN | 0 | 3.0 | Dépendance | NaN | 0.0 | 0.0 | S | NaN | 2410.0 |
| 2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 | 2021-06-01 | Vente | ... | NaN | 0 | NaN | NaN | NaN | NaN | NaN | BT | NaN | 530.0 |
| 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 | 2021-04-01 | Vente | ... | NaN | 0 | 1.0 | Maison | NaN | 88.0 | 4.0 | S | NaN | 866.0 |
| 4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1 | 2021-06-01 | Vente | ... | NaN | 0 | 1.0 | Maison | NaN | 168.0 | 5.0 | S | NaN | 1426.0 |
5 rows × 43 columns
full_table.isna().sum()
No disposition 0 Date mutation 0 Nature mutation 0 Valeur fonciere 32780 No voie 1189446 B/T/Q 3219148 Type de voie 1275146 Code voie 31951 Voie 32386 Code postal 32050 Commune 0 Code departement 0 Code commune 0 Prefixe de section 3255458 Section 168 No plan 0 No Volume 3371789 1er lot 2328039 Surface Carrez du 1er lot 3068652 2eme lot 3067321 Surface Carrez du 2eme lot 3280272 3eme lot 3322900 Surface Carrez du 3eme lot 3368183 4eme lot 3360936 Surface Carrez du 4eme lot 3376497 5eme lot 3371401 Surface Carrez du 5eme lot 3378296 Nombre de lots 0 Code type local 1365323 Type local 1365323 Identifiant local 3379232 Surface reelle bati 1367180 Nombre pieces principales 1367180 Nature culture 1065291 Nature culture speciale 3238191 Surface terrain 1065291 nature culture totale 3238191 dtype: int64
# on recuperer uniquement les colonne qui ont moins de 10% de nan
clear_table=full_table[["Date mutation","Nature mutation","Valeur fonciere","Type de voie","Nombre de lots","Commune","Code departement","No plan","Code type local","Surface reelle bati","Nature culture","Nature culture speciale","Surface terrain"]]
clear_table.head()
| Date mutation | Nature mutation | Valeur fonciere | Type de voie | Nombre de lots | Commune | Code departement | No plan | Code type local | Surface reelle bati | Nature culture | Nature culture speciale | Surface terrain | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021-05-01 | Vente | 185000,00 | CHE | 0 | VAL-REVERMONT | 1 | 122 | 1.0 | 97.0 | S | NaN | 2410.0 |
| 1 | 2021-05-01 | Vente | 185000,00 | CHE | 0 | VAL-REVERMONT | 1 | 122 | 3.0 | 0.0 | S | NaN | 2410.0 |
| 2 | 2021-06-01 | Vente | 10,00 | NaN | 0 | BEY | 1 | 204 | NaN | NaN | BT | NaN | 530.0 |
| 3 | 2021-04-01 | Vente | 204332,00 | ALL | 0 | BUELLAS | 1 | 1325 | 1.0 | 88.0 | S | NaN | 866.0 |
| 4 | 2021-06-01 | Vente | 320000,00 | RTE | 0 | MONTAGNAT | 1 | 11 | 1.0 | 168.0 | S | NaN | 1426.0 |
table=clear_table.copy()
test=table.groupby(['Date mutation'])['Nature mutation'].count()
test.plot()
<AxesSubplot:xlabel='Date mutation'>
df["Date mutation"] = pd.to_datetime(df["Date mutation"])
temp=df.groupby(['Date mutation'])['Valeur fonciere'].sum()
plt.figure(figsize=(20, 5))
ax = calmap.yearplot(temp, fillcolor='white', cmap='Oranges', linewidth=0.5)
On remarque une baisse significative des taux de creedit immobilier vers juillet 2021 impliquant une facilité pour les particulier de souscraire un pret pour acheter un bien impliquant donc ce pique siginificatif
geo=json.load(open("D:/dwon/projetpython/region.geojson"))
francecoord=folium.Map(location=[48.856578,2.351828],zoom_start=5.5)
clear_table['Code departement']=clear_table['Code departement'].astype(str)
table=clear_table.groupby(['Code departement'])['Nature mutation'].count()
francecoord.choropleth(geo_data=geo, key_on="feature.properties.code", data=table,columns=['Code departement','Nature mutation']
,fill_color="Spectral")
francecoord
On remarque donc qu'en 2021 les zones aux alentours des grandes villes reste les plus attractive impliquant donc un nombre plus conséquent de vente sur l'annnée
NM=clear_table['Nature mutation'].value_counts()
from matplotlib import pyplot
pyplot.axis('equal')
NM.plot(kind='pie',legend=True,figsize=(20,20))
<AxesSubplot:ylabel='Nature mutation'>
clear_table['Nature mutation'].value_counts()
Vente 3136416 Vente en l'état futur d'achèvement 193014 Echange 31564 Vente terrain à bâtir 9617 Adjudication 5941 Expropriation 2680 Name: Nature mutation, dtype: int64
tablese=clear_table.copy()
tablese.rename(columns = {'Nature mutation':'NM'},inplace = True)
newdf = tablese[(tablese.NM == "Expropriation")]
francexpr=folium.Map(location=[48.856578,2.351828],zoom_start=5.5)
newdf['Code departement']=newdf['Code departement'].astype(str)
table=newdf.groupby(['Code departement'])['NM'].count()
francexpr.choropleth(geo_data=geo, key_on="feature.properties.code", data=table,columns=['Code departement','NM']
,fill_color="Spectral")
francexpr
tablese=clear_table.copy()
tablese.rename(columns = {'Nombre de lots':'ndl'},inplace = True)
newdf = tablese[(tablese.ndl>0)]
francexpr=folium.Map(location=[48.856578,2.351828],zoom_start=5.5)
newdf['Code departement']=newdf['Code departement'].astype(str)
table=newdf.groupby(['Code departement'])['ndl'].count()
francexpr.choropleth(geo_data=geo, key_on="feature.properties.code", data=table,columns=['Code departement','ndl']
,fill_color="Spectral")
francexpr
df=pd.read_csv("data utilisable.csv")
#convertir le type de Valeur fonciere de str --> float64
import locale
locale.setlocale(locale.LC_ALL, 'nl_NL')
df['Valeur fonciere']=df['Valeur fonciere'].apply(lambda x : locale.atof(str(x)))
#
temp=df.groupby("Type de voie")['Valeur fonciere','Surface terrain'].sum().sort_values(by='Valeur fonciere',ascending=False)
temp["voie"]=temp.index
valeur_fonciere_total=temp["Valeur fonciere"].sum()
print(f"la valeur fonciere totale échangé cette année est {valeur_fonciere_total} euros")
temp["valeur fonciere %"]=temp["Valeur fonciere"].apply(lambda x:(x/valeur_fonciere_total)*100)
temp
la valeur fonciere totale échangé cette année est 6278338964152.69 euros
| Valeur fonciere | Surface terrain | voie | valeur fonciere % | |
|---|---|---|---|---|
| Type de voie | ||||
| RUE | 4.092743e+12 | 614703414.0 | RUE | 6.518831e+01 |
| AV | 1.011733e+12 | 111497912.0 | AV | 1.611467e+01 |
| CHE | 2.414344e+11 | 116814304.0 | CHE | 3.845513e+00 |
| RTE | 2.191816e+11 | 144221264.0 | RTE | 3.491076e+00 |
| PL | 1.265968e+11 | 8361187.0 | PL | 2.016405e+00 |
| ... | ... | ... | ... | ... |
| BRE | 7.800000e+04 | 212.0 | BRE | 1.242367e-06 |
| COIN | 7.000000e+04 | 30.0 | COIN | 1.114945e-06 |
| HLG | 1.250000e+04 | 653.0 | HLG | 1.990972e-07 |
| ZUP | 3.000000e+00 | 1257.0 | ZUP | 4.778334e-11 |
| DEVI | 2.000000e+00 | 829.0 | DEVI | 3.185556e-11 |
135 rows × 4 columns
fig=px.bar(temp.head(6),
y='voie', x='valeur fonciere %', title = ' Valeur fonciere / type de voie',
text='valeur fonciere %', orientation='h', width=700, height=700 )
fig.update_traces(marker_color='red', opacity=0.6, textposition='outside')
fig.show()
temp=df.groupby("Type de voie")["Surface terrain",'Surface reelle bati'].sum()
temp=temp.reset_index(drop=False)
fig=px.treemap(temp,path=['Type de voie'], values="Surface terrain", height=700,title="Surface terrain en m² totale par type de voie", color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+value+percent parent'
fig.show()
fig=px.treemap(temp,path=['Type de voie'], values="Surface reelle bati", height=700,title="Surface reelle bati en m² totale par type de voie", color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+value+percent parent'
fig.show()
data = urlopen("https://france-geojson.gregoiredavid.fr/repo/departements.geojson").read()
geo = json.loads(data.decode('utf-8'))
dm = pd.read_csv('data.csv')
dm = dm.drop(['Unnamed: 0'],axis=1)
dm["Date mutation"] = dm["Date mutation"].astype('datetime64[ns]')
dm["Code departement"] = dm["Code departement"].astype(str)
/Users/steven/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (6) have mixed types.Specify dtype option on import or set low_memory=False. exec(code_obj, self.user_global_ns, self.user_ns)
test = 100*(1 - (dm.groupby(['Code departement'])['Surface reelle bati'].mean() / dm.groupby(['Code departement'])['Surface terrain'].mean())).round(2)
test1 = pd.DataFrame(test)
test1.columns = ['Pourcentage de surface libre sur les terrains']
test1['Nombre de terrain'] = dm.groupby(['Code departement'])['Surface reelle bati'].count()
test1['Code departement'] = test1.index
cols = test1.columns.tolist()
cols = cols[-1:] + cols[:-1]
test1 = test1[cols]
test1 = test1.sort_values(by='Pourcentage de surface libre sur les terrains', ascending=True).reset_index(drop = True)
test1.style.background_gradient(cmap='Reds')
| Code departement | Pourcentage de surface libre sur les terrains | Nombre de terrain | |
|---|---|---|---|
| 0 | 94 | 93.000000 | 43856 |
| 1 | 59 | 94.000000 | 83808 |
| 2 | 7 | 94.000000 | 1854 |
| 3 | 90 | 95.000000 | 5264 |
| 4 | 62 | 95.000000 | 31528 |
| 5 | 78 | 95.000000 | 39490 |
| 6 | 77 | 95.000000 | 53824 |
| 7 | 91 | 95.000000 | 42343 |
| 8 | 73 | 95.000000 | 25200 |
| 9 | 56 | 96.000000 | 23574 |
| 10 | 75 | 96.000000 | 79761 |
| 11 | 33 | 96.000000 | 62958 |
| 12 | 1 | 96.000000 | 16526 |
| 13 | 69 | 96.000000 | 62489 |
| 14 | 42 | 96.000000 | 34887 |
| 15 | 43 | 96.000000 | 3950 |
| 16 | 44 | 96.000000 | 56464 |
| 17 | 80 | 96.000000 | 16131 |
| 18 | 29 | 96.000000 | 14786 |
| 19 | 54 | 96.000000 | 27485 |
| 20 | 27 | 96.000000 | 17070 |
| 21 | 26 | 96.000000 | 15091 |
| 22 | 95 | 96.000000 | 37648 |
| 23 | 22 | 96.000000 | 25985 |
| 24 | 85 | 96.000000 | 32417 |
| 25 | 12 | 96.000000 | 3822 |
| 26 | 88 | 96.000000 | 13978 |
| 27 | 93 | 96.000000 | 32277 |
| 28 | 81 | 96.000000 | 6646 |
| 29 | 9 | 96.000000 | 6998 |
| 30 | 17 | 96.000000 | 16471 |
| 31 | 971 | 97.000000 | 1858 |
| 32 | 6 | 97.000000 | 66308 |
| 33 | 60 | 97.000000 | 25673 |
| 34 | 92 | 97.000000 | 54566 |
| 35 | 64 | 97.000000 | 19930 |
| 36 | 70 | 97.000000 | 6058 |
| 37 | 71 | 97.000000 | 20181 |
| 38 | 72 | 97.000000 | 25222 |
| 39 | 74 | 97.000000 | 16574 |
| 40 | 76 | 97.000000 | 28620 |
| 41 | 84 | 97.000000 | 20536 |
| 42 | 83 | 97.000000 | 32885 |
| 43 | 82 | 97.000000 | 8407 |
| 44 | 65 | 97.000000 | 8775 |
| 45 | 79 | 97.000000 | 15412 |
| 46 | 52 | 97.000000 | 4583 |
| 47 | 31 | 97.000000 | 28058 |
| 48 | 11 | 97.000000 | 13343 |
| 49 | 13 | 97.000000 | 38418 |
| 50 | 16 | 97.000000 | 15091 |
| 51 | 19 | 97.000000 | 10376 |
| 52 | 2 | 97.000000 | 18689 |
| 53 | 24 | 97.000000 | 17548 |
| 54 | 30 | 97.000000 | 34727 |
| 55 | 50 | 97.000000 | 8728 |
| 56 | 34 | 97.000000 | 46719 |
| 57 | 35 | 97.000000 | 40425 |
| 58 | 37 | 97.000000 | 16639 |
| 59 | 38 | 97.000000 | 22499 |
| 60 | 4 | 97.000000 | 3623 |
| 61 | 974 | 97.000000 | 7380 |
| 62 | 40 | 97.000000 | 17787 |
| 63 | 41 | 97.000000 | 15147 |
| 64 | 49 | 97.000000 | 26931 |
| 65 | 46 | 97.000000 | 2668 |
| 66 | 5 | 97.000000 | 2706 |
| 67 | 86 | 98.000000 | 16742 |
| 68 | 87 | 98.000000 | 16712 |
| 69 | 89 | 98.000000 | 16205 |
| 70 | 55 | 98.000000 | 5735 |
| 71 | 18 | 98.000000 | 13499 |
| 72 | 66 | 98.000000 | 6040 |
| 73 | 14 | 98.000000 | 20559 |
| 74 | 53 | 98.000000 | 12293 |
| 75 | 973 | 98.000000 | 2757 |
| 76 | 10 | 98.000000 | 11465 |
| 77 | 972 | 98.000000 | 4574 |
| 78 | 21 | 98.000000 | 9537 |
| 79 | 15 | 98.000000 | 5482 |
| 80 | 58 | 98.000000 | 9269 |
| 81 | 28 | 98.000000 | 18384 |
| 82 | 2A | 98.000000 | 3360 |
| 83 | 8 | 98.000000 | 7890 |
| 84 | 2B | 98.000000 | 1173 |
| 85 | 3 | 98.000000 | 12712 |
| 86 | 48 | 98.000000 | 1387 |
| 87 | 47 | 98.000000 | 11067 |
| 88 | 32 | 98.000000 | 6217 |
| 89 | 61 | 98.000000 | 8988 |
| 90 | 36 | 98.000000 | 6999 |
| 91 | 45 | 98.000000 | 21332 |
| 92 | 63 | 98.000000 | 3872 |
| 93 | 39 | 98.000000 | 29995 |
| 94 | 23 | 98.000000 | 4509 |
| 95 | 51 | 98.000000 | 21966 |
| 96 | 25 | 99.000000 | 23661 |
fg = dm.groupby(['Code departement','Date mutation'])['Surface terrain'].sum()
fg = fg.reset_index()
fg
fig = px.bar(fg, x="Date mutation", y="Surface terrain", color='Code departement', orientation='v', height=600,
title='Surface par date et le département', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
##On peut voir les pics de ventes en été
fig = px.treemap(fg.sort_values(by='Surface terrain' ,ascending=False).reset_index(drop=True), path=["Code departement"], values="Surface terrain", height=700,title='Surface/Departement',color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()
fg = dm
fg.dropna(subset = ["Nature culture"], inplace=True)
fg = fg.groupby(['Date mutation'])['Nature culture'].count()
fg = fg.reset_index()
fg
fig = px.bar(fg, x="Date mutation", y="Nature culture", color='Nature culture', orientation='v', height=600,
title='Vente de type culture par date', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
fg = dm
fg.dropna(subset = ["Nature culture"], inplace=True)
fg["Valeur fonciere"] = fg["Valeur fonciere"].astype(str)
fg["Valeur fonciere"] = fg["Valeur fonciere"].apply(lambda x : float(x.replace(',','.')))
fg = fg.groupby(['Nature culture','Date mutation'])['Valeur fonciere'].sum()
fg = fg.reset_index()
fig = px.bar(fg, x="Date mutation", y="Valeur fonciere", color='Nature culture', orientation='v', height=600,
title='Investiment de type nature culture par date', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
francecoord=folium.Map(location=[48.856578,2.351828])
table=dm.groupby(['Code departement'])['Nature mutation'].count()
francecoord.choropleth(geo_data=geo, key_on="feature.properties.code", data=table,columns=['Code departement','Nature mutation']
,fill_color="Spectral")
francecoord
/Users/steven/opt/anaconda3/lib/python3.9/site-packages/folium/folium.py:409: FutureWarning: The choropleth method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this.